clear
set more off
capture log close


/* SEQUENTIAL MODEL */
local model_descr "Sequential model: decomposition for parents only (pi from Yc, force pi1 same on eta_ep, eta_p)"


* CHOOSE subsample  

  *income years for children (not survey years)
  loc yrmin=${yrmin} // earliest possible=1985	
  loc yrmax=${yrmax} // latest possible=2018
  
	*STUBNAME for saving log file and output matrix/dataset 
	local modelnum "2b"
	global stubfilenm "model_`modelnum'_sequential_`yrmin'_`yrmax'"
  
  
	*LOG file 
	log using ${us_results}/${stubfilenm}.log, replace
  
  *child ages to observe their adult incomes
  loc agemin=25 // youngest possible=25
  loc agemax=48 // oldest possible=48
  
  *child birth cohorts to use
  loc cohortmin=1952 // earliest possible=1952
  loc cohortmax=1993 // latest possible=1993



*USE the ranked MAIN SAMPLE 
use *newid *LAB* AGE *cohort *LABYR* *MF* *empl* *schmax year female if year>=1985 using ${projdata}/analysis-sample-main.dta, clear

gen AGEC1=year-cohort-40 
gen f_LABAGE_1=f_LABYR-f_cohort 
gen m_LABAGE_1=m_LABYR-m_cohort 

forval i=2/4{
	gen AGEC`i'=AGEC1^`i'
	gen f_LABAGE_`i'=f_LABAGE_1^`i'
	gen m_LABAGE_`i'=m_LABAGE_1^`i'
}


* SAMPLE selection

* Use mother's sample only
 keep if pm_LAB!=. & m_LABAGE_1!=.
 keep if m_emplavg!=. & m_schmax!=.
 
 
* RESDIUALIZE all measures 
 foreach var in pLAB employ schmax pm_LAB m_emplavg m_schmax  {
	qui regress `var' m_LABAGE_? AGEC? i.year
	predict `var'_r, resid
 }

* Subsample (pool sons and daughters for now)
 keep if inrange(year,`yrmin',`yrmax') & inrange(AGE,`agemin',`agemax')
 tab year, su(AGE)
 
* ESTIMATE MODEL 

scalar drop _all
cap noi drop eta_* 
cap noi drop yhat* 
cap noi drop uhat*

*Sample sizes
qui reg pLAB_r pm_LAB_r, cluster(newid)
scalar N=e(N)
scalar Nkids=e(N_clust)
qui reg pLAB_r pm_LAB_r, cluster(m_newid)
scalar Nmothers=e(N_clust)

scalar list N Nkids Nmothers 


* "IGE"
 reg pLAB_r pm_LAB_r 
 scalar IGE=_b[pm_LAB_r]
	
* Parent employment: Ep = Hp + eta_ep 				
 reg m_emplavg_r m_schmax_r 	
	predict eta_ep, resid 	
	scalar phi=_b[m_schmax_r] 

* Parent income: Yp = O_0p*Ep + O_1p*Hp + eta_p
reg pm_LAB_r m_schmax_r m_emplavg_r  	


* Parent Income: Yp = (O_0p+O_1p)*Hp + O_0p*eta_ep + eta_p		
reg pm_LAB_r m_schmax_r eta_ep
	predict eta_p, resid 
		
	scalar O_p=_b[m_schmax_r] 
	scalar O_0p=_b[eta_ep] 	 
	scalar O_1p=O_p-(O_0p*phi)
	
	scalar list O_p O_0p O_1p phi
	
	
* Check predicted parent income rank.
cap noi drop *_pred
   gen pm_LAB_pred=O_p*m_schmax_r + O_0p*eta_ep + eta_p  		
   sum pm_LAB_r pm_LAB_pred  
   corr pm_LAB_r pm_LAB_pred 
   corr pm_LAB_r pm_LAB_pred, cov 
   reg pLAB_r pm_LAB_r
   reg pLAB_r pm_LAB_pred
               	 
			 	
* Child income to estimate pi
generate eta_epp=(O_0p*eta_ep)+eta_p 
	reg pLAB_r m_schmax_r eta_epp
	scalar pi1= _b[eta_epp] 
	scalar pi2= _b[m_schmax_r]-((_b[eta_epp])*O_p) 
		
scalar list pi1 pi2 

		
	
* DECOMPOSITION 
		

	*Get variances of parent income elements
	 su m_schmax_r
	 scalar vHp=r(Var)
	 su eta_ep 
	 scalar vEta_ep=r(Var)
	 su eta_p 
	 scalar vEta_p=r(Var)
	 scalar list vHp vEta_ep vEta_p 
	 	
	*Calc var(Y) using estimated components
	 scalar vYp=(O_p^2)*vHp + (O_0p^2)*vEta_ep +vEta_p
	 scalar list vYp	 	 
	 qui su pm_LAB_r // check against observed y var
	 di "pm_LAB_r var=" r(Var) 	
	 
	 
	
* IGE calculation
	scalar B= (pi1+(pi2/O_p))*(O_p^2)*(vHp/vYp) + (pi1*(O_0p^2)*(vEta_ep/vYp)) + pi1*(vEta_p/vYp)  
	


*SAVE OUTPUT

clear 
local IGAs		"IGE B"
local pis 		"pi1 pi2"
local varcovs 	"vYp vHp vEta_ep vEta_p"
local param		"O_0p O_1p O_p phi"  
local Ns		"N Nkids Nmothers"

local rownamelist ""
matrix $stubfilenm=(0)
foreach xx in `IGAs' `pis' `varcovs' `param' `Ns' {
	sca y=`xx'
	matrix $stubfilenm=$stubfilenm,y
	local varnamelist "`varnamelist' `xx'"
}
matrix ${stubfilenm}=${stubfilenm}[....,2....]
matrix colnames ${stubfilenm}=`varnamelist'

svmat2 ${stubfilenm}, names(col)
gen model_descr="`model_descr'"
gen modelnum="`modelnum'"
gen yearmin=`yrmin'
gen yearmax=`yrmax'

save ${us_results}/${stubfilenm}.dta, replace



clear
log close 
	
